package org.calrissian.flowbox; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.IRichSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; import org.calrissian.flowbox.bolt.*; import org.calrissian.flowbox.spout.TickSpout; import static org.calrissian.flowbox.Constants.*; import static org.calrissian.flowbox.model.AggregateOp.AGGREGATE; import static org.calrissian.flowbox.model.EachOp.EACH; import static org.calrissian.flowbox.model.FilterOp.FILTER; import static org.calrissian.flowbox.model.JoinOp.JOIN; import static org.calrissian.flowbox.model.PartitionOp.PARTITION; import static org.calrissian.flowbox.model.SelectOp.SELECT; import static org.calrissian.flowbox.model.StopGateOp.STOP_GATE; import static org.calrissian.flowbox.spout.MockFlowLoaderSpout.FLOW_LOADER_STREAM; /** * Builds the base flowbox topology configuration. The topology builder is returned so that it can be further * customized. Most often, it will be useful to further provision a downstream bolt that will process the data * even after the output. The output stream and component id provisioned on the output of the builder are both * "output". */ public class FlowboxFactory { private IRichSpout ruleSpout; private IRichSpout eventsSpout; private IRichBolt outputBolt; private int parallelismHint; /** * @param ruleSpout A spout that feeds rules into flowbox. This just needs to emit a Collection<Flow> in the tuple * at index 0 with a field name of "flows". * @param eventsSpout A spout that provides the events to std input. * @param outputBolt A bolt to accept the output events (with the field name "event") * @param parallelismHint The number of executors to run the parallel streams. */ public FlowboxFactory(IRichSpout ruleSpout, IRichSpout eventsSpout, IRichBolt outputBolt, int parallelismHint) { this.ruleSpout = ruleSpout; this.eventsSpout = eventsSpout; this.outputBolt = outputBolt; this.parallelismHint = parallelismHint; } /** * @return A topology builder than can further be customized. */ public TopologyBuilder createFlowbox() { TopologyBuilder builder = new TopologyBuilder(); builder.setSpout(EVENT, eventsSpout, 1); builder.setSpout(FLOW_LOADER_STREAM, ruleSpout, 1); builder.setSpout("tick", new TickSpout(1000), 1); builder.setBolt(INITIALIZER, new FlowInitializerBolt(), parallelismHint) // kicks off a flow determining where to start .shuffleGrouping(EVENT) .allGrouping(FLOW_LOADER_STREAM, FLOW_LOADER_STREAM); declarebolt(builder, FILTER, new FilterBolt(), parallelismHint); declarebolt(builder, SELECT, new SelectorBolt(), parallelismHint); declarebolt(builder, PARTITION, new PartitionBolt(), parallelismHint); declarebolt(builder, STOP_GATE, new StopGateBolt(), parallelismHint); declarebolt(builder, AGGREGATE, new AggregatorBolt(), parallelismHint); declarebolt(builder, JOIN, new JoinBolt(), parallelismHint); declarebolt(builder, EACH, new EachBolt(), parallelismHint); declarebolt(builder, OUTPUT, outputBolt, parallelismHint); return builder; } public FlowboxFactory() { } private static void declarebolt(TopologyBuilder builder, String boltName, IRichBolt bolt, int parallelism) { builder.setBolt(boltName, bolt, parallelism) .allGrouping(FLOW_LOADER_STREAM, FLOW_LOADER_STREAM) .allGrouping("tick", "tick") .localOrShuffleGrouping(INITIALIZER, boltName) .localOrShuffleGrouping(FILTER, boltName) .fieldsGrouping(PARTITION, boltName, new Fields(FLOW_ID, PARTITION)) // guaranteed partitions will always group the same flow for flows that have joins with default partitions. .localOrShuffleGrouping(AGGREGATE, boltName) .localOrShuffleGrouping(SELECT, boltName) .localOrShuffleGrouping(EACH, boltName) .localOrShuffleGrouping(STOP_GATE, boltName) .localOrShuffleGrouping(JOIN, boltName); } public static void declareOutputStreams(OutputFieldsDeclarer declarer) { Fields fields = new Fields(FLOW_ID, EVENT, FLOW_OP_IDX, STREAM_NAME, LAST_STREAM); declarer.declareStream(PARTITION, fields); declarer.declareStream(FILTER, fields); declarer.declareStream(SELECT, fields); declarer.declareStream(AGGREGATE, fields); declarer.declareStream(STOP_GATE, fields); declarer.declareStream(JOIN, fields); declarer.declareStream(EACH, fields); declarer.declareStream(OUTPUT, fields); } public static void declarePartitionedOutputStreams(OutputFieldsDeclarer declarer) { Fields fields = new Fields(FLOW_ID, EVENT, FLOW_OP_IDX, STREAM_NAME, PARTITION, LAST_STREAM); declarer.declareStream(PARTITION, fields); declarer.declareStream(FILTER, fields); declarer.declareStream(SELECT, fields); declarer.declareStream(AGGREGATE, fields); declarer.declareStream(STOP_GATE, fields); declarer.declareStream(EACH, fields); declarer.declareStream(JOIN, fields); declarer.declareStream(OUTPUT, fields); } }